Using maps

library(raster) # raster handling (needed for relief)
## Loading required package: sp
library(viridis) # viridis color scale
## Loading required package: viridisLite
library(cowplot) # stack ggplots
library(colorspace) #nicer color scales
## 
## Attaching package: 'colorspace'
## The following object is masked from 'package:raster':
## 
##     RGB
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract()   masks raster::extract()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::select()    masks raster::select()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(rworldmap)
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
# set default figure parameters for knitr
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
wmap <- rworldmap::getMap(resolution = "low")  %>%
  st_as_sf()

ggplot(data = wmap) +
  geom_sf(aes(fill = NAME)) +
  theme_minimal() +
  guides(fill = FALSE) # don't show legend

wmap_ant <- getMap()[-which(getMap()$ADMIN=='Antarctica'),]  %>%
  st_as_sf()
ggplot(data = wmap_ant) +
  geom_sf(aes(fill = NAME)) +
  theme_minimal() +
  guides(fill = FALSE) # don't show legend

Raster maps

Simple example: https://ggplot2.tidyverse.org/reference/ggsf.html

nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
ggplot(nc) +
  geom_sf(aes(fill = AREA))

# If not supplied, coord_sf() will take the CRS from the first layer
# and automatically transform all other layers to use that CRS. This
# ensures that all data will correctly line up
nc_3857 <- sf::st_transform(nc, 3857)
ggplot() +
  geom_sf(data = nc) +
  geom_sf(data = nc_3857, colour = "red", fill = NA)

# Unfortunately if you plot other types of feature you'll need to use
# show.legend to tell ggplot2 what type of legend to use
nc_3857$mid <- sf::st_centroid(nc_3857$geometry)
ggplot(nc_3857) +
  geom_sf(colour = "white") +
  geom_sf(aes(geometry = mid, size = AREA), show.legend = "point")

# You can also use layers with x and y aesthetics. To have these interpreted
# as longitude/latitude you need to set the default CRS in coord_sf()
ggplot(nc_3857) +
  geom_sf() +
  annotate("point", x = -80, y = 35, colour = "red", size = 4) +
  coord_sf(default_crs = sf::st_crs(4326))

# To add labels, use geom_sf_label().
ggplot(nc_3857[1:3, ]) +
   geom_sf(aes(fill = AREA)) +
   geom_sf_label(aes(label = NAME))

Illinois example

il_counties <- map_data("county", "illinois") 
head(il_counties)
##        long      lat group order   region subregion
## 1 -91.49563 40.21018     1     1 illinois     adams
## 2 -90.91121 40.19299     1     2 illinois     adams
## 3 -90.91121 40.19299     1     3 illinois     adams
## 4 -90.91121 40.10704     1     4 illinois     adams
## 5 -90.91121 39.83775     1     5 illinois     adams
## 6 -90.91694 39.75754     1     6 illinois     adams
ggplot(il_counties, aes(long, lat, group = group)) +
  geom_polygon(fill = "white", colour = "grey50") + 
  coord_quickmap()

il_county <- tigris::counties(state = "illinois", cb = TRUE) %>% 
    st_as_sf() 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |======================================================================| 100%
ggplot(il_county) + 
  geom_sf(color = "blue", fill = "lightgray") 

Add cities

Data source: https://geo-csv.com/illinois/

illinois <- read_csv("../data/illinois.csv")
ggplot(il_county) + 
  geom_sf(color = "blue", fill = "gray") +
 geom_point(data = illinois, 
            aes(x = city_longitude, y = city_latitude), 
            colour = "darkblue", size = 0.1, alpha = 0.4) + 
  coord_sf() 

# bring in county pop + cities
pop <- read_csv("../data/dceo_county_pop.csv")

# merge by county
pop_all <- pop %>% filter(`Age Group` == "All" & Race == "All") %>% mutate(NAME = `State/County`)
county_pop <- full_join(il_county, pop_all, by="NAME")
  
# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
 geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
 scale_fill_continuous_sequential(palette = "viridis", rev = FALSE) +
  coord_sf() 

#try breaks
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
 geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
 scale_fill_binned_sequential(
    palette = "viridis",
    rev = FALSE,
    aesthetics = c("fill", "color"),
    n.breaks = 3, 
  ) +
  labs(fill="2005 population") + 
  coord_sf() 

Doesn’t look too great – let’s try logging

#try logging
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
 geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
 scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
  labs(fill="2005 population") + 
  coord_sf() 

# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
 geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
 scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
  labs(fill="2005 population") + 
 geom_point(data = illinois,  # bringing in our city data
            aes(x = city_longitude, y = city_latitude), 
            colour = "white", size = 0.01, alpha = 0.2) + # room to improve aesthetically
  coord_sf() 

ggsave("il_counties_cities.png")

Switzerland

Source: https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/ and https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/

I did not create any of this, just FYI! it’s a streamlined version from the source website and repo

# read cantonal borders
canton_geo <- read_sf("../data/input/g2k15.shp")

# read country borders
country_geo <- read_sf("../data/input/g2l15.shp")

# read lakes
lake_geo <- read_sf("../data/input/g2s15.shp")

# read productive area (2324 municipalities)
municipality_prod_geo <- read_sf("../data/input/gde-1-1-15.shp")

# read in raster of relief
relief <- raster("../data/input/02-relief-ascii.asc") %>%
  # hide relief outside of Switzerland by masking with country borders
  mask(country_geo) %>%
  as("SpatialPixelsDataFrame") %>%
  as.data.frame() %>%
  rename(value = `X02.relief.ascii`)

# clean up
rm(country_geo)
data <- read_csv("../data/input/data.csv")

# create 3 buckets for gini
quantiles_gini <- data %>%
  pull(gini) %>%
  quantile(probs = seq(0, 1, length.out = 4))

# create 3 buckets for mean income
quantiles_mean <- data %>%
  pull(mean) %>%
  quantile(probs = seq(0, 1, length.out = 4))

Join Color Codes to the Data

Here the municipalities are put into the appropriate class corresponding to their average income and income (in-)equality.

# cut into groups defined above and join fill
municipality_prod_geo <- municipality_prod_geo %>%
  left_join(data, by = c("BFS_ID" = "bfs_id"))
class(municipality_prod_geo)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
# pretyfiying labels
# define number of classes
no_classes <- 6

# extract quantiles
quantiles <- municipality_prod_geo %>%
  pull(mean) %>%
  quantile(probs = seq(0, 1, length.out = no_classes + 1)) %>%
  as.vector() # to remove names of quantiles, so idx below is numeric

# here we create custom labels
labels <- imap_chr(quantiles, function(., idx){
  return(paste0(round(quantiles[idx] / 1000, 0),
                             "k",
                             " – ",
                             round(quantiles[idx + 1] / 1000, 0),
                             "k"))
})

# we need to remove the last label 
# because that would be something like "478k - NA"
labels <- labels[1:length(labels) - 1]

municipality_prod_geo <- municipality_prod_geo %>%
  mutate(mean_quantiles = cut(mean,
                               breaks = quantiles,
                               labels = labels,
                               include.lowest = T))
ggplot(
  # define main data source
  data = municipality_prod_geo
) +
    # raster comes as the first layer, municipalities on top
    geom_raster(
      data = relief, aes( x = x,   y = y,  alpha = value  ) ) +
    # use the "alpha hack"
    scale_alpha(name = "", range = c(0.6, 0), guide = "none")  + 
  geom_sf(
    mapping = aes(
      fill = mean_quantiles
      ),
    color = "white",
    size = 0.1
  ) +
  # use the Viridis color scale
  scale_fill_viridis(
    option = "magma",
    name = "Average\nincome in CHF",
    alpha = 0.8, # make fill a bit brighter
    begin = 0.1, # this option seems to be new (compared to 2016):
    # with this we can truncate the
    # color scale, so that extreme colors (very dark and very bright) are not
    # used, which makes the map a bit more aesthetic
    end = 0.9,
    discrete = T, # discrete classes, thus guide_legend instead of _colorbar
    direction = 1, # dark is lowest, yellow is highest
    guide = guide_legend(
     keyheight = unit(5, units = "mm"),
     title.position = "top",
     reverse = T # display highest income on top
  )) +
  geom_sf(
    data = canton_geo,
    fill = "transparent",
    color = "white",
    size = 0.5
  )+
  # draw lakes in light blue
  geom_sf(
    data = lake_geo,
    fill = "#D6F1FF",
    color = "transparent"
  ) +
  # add titles
  labs(x = NULL,
         y = NULL,
         title = "Switzerland's regional income (in-)equality",
         subtitle = paste0("Average yearly income and income",
                           " (in-)equality in Swiss municipalities, 2015"))

Making it fancy

# create color scale that encodes two variables
# red for gini and blue for mean income
# the special notation with gather is due to readibility reasons
bivariate_color_scale <- tibble(
  "3 - 3" = "#3F2949", # high inequality, high income
  "2 - 3" = "#435786",
  "1 - 3" = "#4885C1", # low inequality, high income
  "3 - 2" = "#77324C",
  "2 - 2" = "#806A8A", # medium inequality, medium income
  "1 - 2" = "#89A1C8",
  "3 - 1" = "#AE3A4E", # high inequality, low income
  "2 - 1" = "#BC7C8F",
  "1 - 1" = "#CABED0" # low inequality, low income
) %>%
  gather("group", "fill")

municipality_prod_geo <- municipality_prod_geo %>%
  mutate(
    gini_quantiles = cut(
      gini,
      breaks = quantiles_gini,
      include.lowest = TRUE
    ),
    mean_quantiles = cut(
      mean,
      breaks = quantiles_mean,
      include.lowest = TRUE
    ),
    # by pasting the factors together as numbers we match the groups defined
    # in the tibble bivariate_color_scale
    group = paste(
      as.numeric(gini_quantiles), "-",
      as.numeric(mean_quantiles)
    )
  ) %>%
  # we now join the actual hex values per "group"
  # so each municipality knows its hex value based on the his gini and avg
  # income value
  left_join(bivariate_color_scale, by = "group")


map <- ggplot(
  # use the same dataset as before
  data = municipality_prod_geo
  ) +
  # first: draw the relief
  geom_raster(
    data = relief,
    aes(
      x = x,
      y = y,
      alpha = value
    )
  ) +
  # use the "alpha hack" (as the "fill" aesthetic is already taken)
  scale_alpha(name = "",
              range = c(0.6, 0),
              guide = F) + # suppress legend
  # color municipalities according to their gini / income combination
  geom_sf(
    aes(
      fill = fill
    ),
    # use thin white stroke for municipalities
    color = "white",
    size = 0.1
  ) +
  # as the sf object municipality_prod_geo has a column with name "fill" that
  # contains the literal color as hex code for each municipality, we can use
  # scale_fill_identity here
  scale_fill_identity() +
  # use thicker white stroke for cantons
  geom_sf(
    data = canton_geo,
    fill = "transparent",
    color = "white",
    size = 0.5
  ) +
  # draw lakes in light blue
  geom_sf(
    data = lake_geo,
    fill = "#D6F1FF",
    color = "transparent"
  ) +
  # add titles
  labs(x = NULL,
         y = NULL,
         title = "Switzerland's regional income (in-)equality",
         subtitle = paste0("Average yearly income and income",
                           " (in-)equality in Swiss municipalities, 2015")) +
  # add the theme
  theme_map()
# separate the groups
bivariate_color_scale <- bivariate_color_scale %>%
  separate(group, into = c("gini", "mean"), sep = " - ") %>%
  mutate(gini = as.integer(gini),
         mean = as.integer(mean))

legend <- ggplot() +
  geom_tile(
    data = bivariate_color_scale,
    mapping = aes(
      x = gini,
      y = mean,
      fill = fill)
  ) +
  scale_fill_identity() +
  labs(x = "Higher inequality",
       y = "Higher income") +
  theme_map() +
  # make font small enough
  theme(
    axis.title = element_text(size = 6)
  ) +
  # quadratic tiles
  coord_fixed()

bringing it all together!

ggdraw() +
  draw_plot(map, 0, 0, 1, 1) +
  draw_plot(legend, 0.05, 0.075, 0.2, 0.2)

Advanced: annotations

Advanced: adding in annocations

annotations <- tibble(
  label = c(
    "Grey areas mean\nlow income and\nlow inequality",
    "Blue areas mean\nhigh income and\nlow inequality",
    "Violet areas mean\nhigh income and\nhigh inequality",
    "Red areas mean\nlow income and\nhigh inequality"
  ),
  arrow_from = c(
    "548921,232972", # grey
    "771356,238335", # blue
    "781136,125067", # violet
    "616348,81869" # red
  ),
  arrow_to = c(
    "622435,206784", # grey
    "712671,261998", # blue
    "786229,149597", # violet
    "602334,122674" # red
  ),
  curvature = c(
    0.2, # grey
    0.1, # blue
    -0.1, # violet
    -0.2 # red
  ),
  nudge = c(
    "-3000,0", # grey
    "3000,5000", # blue
    "0,-5000", # violet
    "3000,0" # red
  ),
  just = c(
    "1,0", # grey
    "0,1", # blue
    "0.5,1", # violet
    "0,1" # red
  )
) %>%
  separate(arrow_from, into = c("x", "y")) %>%
  separate(arrow_to, into = c("xend", "yend")) %>%
  separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
  separate(just, into = c("hjust", "vjust"), sep = "\\,")
map_a <- ggplot(
  # use the same dataset as before
  data = municipality_prod_geo
  ) +
  # first: draw the relief
  geom_raster(
    data = relief,
    aes(
      x = x,
      y = y,
      alpha = value
    )
  ) +
  # use the "alpha hack" (as the "fill" aesthetic is already taken)
  scale_alpha(name = "",
              range = c(0.6, 0),
              guide = F) + # suppress legend
  # color municipalities according to their gini / income combination
  geom_sf(
    aes(
      fill = fill
    ),
    # use thin white stroke for municipalities
    color = "white",
    size = 0.1
  ) +
  # as the sf object municipality_prod_geo has a column with name "fill" that
  # contains the literal color as hex code for each municipality, we can use
  # scale_fill_identity here
  scale_fill_identity() +
  # use thicker white stroke for cantons
  geom_sf(
    data = canton_geo,
    fill = "transparent",
    color = "white",
    size = 0.5
  ) +
  # draw lakes in light blue
  geom_sf(
    data = lake_geo,
    fill = "#D6F1FF",
    color = "transparent"
  ) +
  # add titles
  labs(x = NULL,
         y = NULL,
         title = "Switzerland's regional income (in-)equality",
         subtitle = paste0("Average yearly income and income",
                           " (in-)equality in Swiss municipalities, 2015")) +
  # add the theme
  theme_map() 

# add annotations one by one by walking over the annotations data frame
# this is necessary because we cannot define nudge_x, nudge_y and curvature
# in the aes in a data-driven way like as with x and y, for example
annotations %>%
  pwalk(function(...) {
    # collect all values in the row in a one-rowed data frame
    current <- tibble(...)

    # convert all columns from x to vjust to numeric
    # as pwalk apparently turns everything into a character (why???)
    current <- current %>%
      mutate_at(vars(x:vjust), as.numeric)

    # update the plot object with global assignment
    map_a <<- map_a +
      # for each annotation, add an arrow
      geom_curve(
        data = current,
        aes(
          x = x,
          xend = xend,
          y = y,
          yend = yend
        ),
        # that's the whole point of doing this loop:
        curvature = current %>% pull(curvature),
        size = 0.2,
        arrow = arrow(
          length = unit(0.005, "npc")
        )
      ) +
      # for each annotation, add a label
      geom_text(
        data = current,
        aes(
          x = x,
          y = y,
          label = label,
          hjust = hjust,
          vjust = vjust
        ),
        # that's the whole point of doing this loop:
        nudge_x = current %>% pull(nudge_x),
        nudge_y = current %>% pull(nudge_y),
        color = "black",
        size = 3
      )
  })
ggdraw() +
  draw_plot(map_a, 0, 0, 1, 1) +
  draw_plot(legend, 0.05, 0.075, 0.2, 0.2)

ggsave("switzerland.png")